The purpose of this file is to quickly and easily compute and display a statistical report of the generated patent databases.


In [23]:
import numpy as np
import matplotlib.pyplot as plt
import sqlite3
databases = ['assignee','citation','class','inventor','patent','patdesc','lawyer','sciref','usreldoc']
for table in databases:
    conn = sqlite3.connect('{0}.sqlite3'.format(table))
    cursor = conn.cursor()
    res = cursor.execute('select count(*) from {0}'.format(table))
    print table,':',res.fetchall()[0][0], 'records'
    conn.close()


assignee : 4903 records
citation : 175005 records
class : 19656 records
inventor : 13882 records
patent : 5292 records
patdesc : 5292 records
lawyer : 5878 records
sciref : 42358 records
usreldoc : 2608 records

Inventor Table Statistics

Histogram of number of inventors per patent, with mean


In [51]:
conn = sqlite3.connect('inventor.sqlite3')
cursor = conn.cursor()
results = cursor.execute('select count(*) from inventor group by Patent;')
inventor_counts = [x[0] for x in results.fetchall()] # we get a list of tuples, so transform it to a regular list
fig = plt.figure()
histogram = fig.add_subplot(111)
n, bins, patches = histogram.hist(inventor_counts)
histogram.set_xlabel('Number of Inventors')
histogram.set_ylabel('Patent Count')
plt.show()
print 'Average Number of Inventors per Patent', np.mean(inventor_counts)
conn.close()


Average Number of Inventors per Patent 2.62320483749

Citation Table Statistics

Histogram with number of citations per patent, with mean


In [104]:
conn = sqlite3.connect('citation.sqlite3')
cursor = conn.cursor()
results = cursor.execute('select count(*) from citation group by Patent;')
citation_counts = [x[0] for x in results.fetchall()] # we get a list of tuples, so transform it to a regular list

fig = plt.figure()
histogram = fig.add_subplot(111)
histogram.set_xscale('log')
bins = range(1,5)
bins.extend(list(np.logspace(1,5,base=5)))
n, bins, patches = histogram.hist(citation_counts, bins=bins, histtype='stepfilled')
histogram.set_xlabel('Number of Citations Cited per Patent')
histogram.set_ylabel('Patent Count')
plt.show()
print 'Average Number of Citations per Patent', np.mean(citation_counts)
conn.close()


Average Number of Citations per Patent 33.2709125475

Histogram with number of citations per patent, with mean


In [ ]: